<!DOCTYPE html>












  


<html class="theme-next mist use-motion" lang="zh-CN">
<head><meta name="generator" content="Hexo 3.8.0">
  <meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
<meta name="theme-color" content="#222">


  <script>
  (function(i,s,o,g,r,a,m){i["DaoVoiceObject"]=r;i[r]=i[r]||function(){(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;a.charset="utf-8";m.parentNode.insertBefore(a,m)})(window,document,"script",('https:' == document.location.protocol ? 'https:' : 'http:') + "//widget.daovoice.io/widget/0f81ff2f.js","daovoice")
  daovoice('init', {
      app_id: "15fc8e87"
    });
  daovoice('update');
  </script>






  
  
    
    
  <script src="/lib/pace/pace.min.js?v=1.0.2"></script>
  <link href="/lib/pace/pace-theme-minimal.min.css?v=1.0.2" rel="stylesheet">







<meta http-equiv="Cache-Control" content="no-transform">
<meta http-equiv="Cache-Control" content="no-siteapp">






















<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css">

<link href="/css/main.css?v=6.3.0" rel="stylesheet" type="text/css">


  <link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png?v=6.3.0">


  <link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png?v=6.3.0">


  <link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png?v=6.3.0">


  <link rel="mask-icon" href="/images/logo.svg?v=6.3.0" color="#222">









<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/',
    scheme: 'Mist',
    version: '6.3.0',
    sidebar: {"position":"left","display":"post","offset":12,"b2t":true,"scrollpercent":true,"onmobile":false},
    fancybox: false,
    fastclick: false,
    lazyload: false,
    tabs: true,
    motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>


  




  <meta name="description" content="使用Apache的开源项目Tika来抽取各种类型文件的文本内容及MetaData">
<meta name="keywords" content="apache,tika,web,抽取文本内容">
<meta property="og:type" content="article">
<meta property="og:title" content="Apache TIKA---抽取多类型文件文本内容和文件的隐藏信息">
<meta property="og:url" content="http://blog.wangxc.club/2017/05/08/Apache-TIKA-抽取多类型文件文本内容和文件的隐藏信息/index.html">
<meta property="og:site_name" content="冬与晨">
<meta property="og:description" content="使用Apache的开源项目Tika来抽取各种类型文件的文本内容及MetaData">
<meta property="og:locale" content="zh-CN">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/3167229-0d3c151f5d34f9ff.jpg?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/3167229-0f4b8c9ec0ecfac0.gif?imageMogr2/auto-orient/strip">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/3167229-5606fa60fadcf365.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/3167229-a96d28392ed317a8.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/3167229-7d08685f66e9ea63.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:updated_time" content="2018-06-29T13:58:50.298Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Apache TIKA---抽取多类型文件文本内容和文件的隐藏信息">
<meta name="twitter:description" content="使用Apache的开源项目Tika来抽取各种类型文件的文本内容及MetaData">
<meta name="twitter:image" content="http://upload-images.jianshu.io/upload_images/3167229-0d3c151f5d34f9ff.jpg?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">






  <link rel="canonical" href="http://blog.wangxc.club/2017/05/08/Apache-TIKA-抽取多类型文件文本内容和文件的隐藏信息/">



<script type="text/javascript" id="page.configurations">
  CONFIG.page = {
    sidebar: "",
  };
</script>

  <title>Apache TIKA---抽取多类型文件文本内容和文件的隐藏信息 | 冬与晨</title>
  




<script async src="https://www.googletagmanager.com/gtag/js?id=UA-102769182-1"></script>
<script>
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('js', new Date());

  gtag('config', 'UA-102769182-1');
</script>



  <script type="text/javascript">
    var _hmt = _hmt || [];
    (function() {
      var hm = document.createElement("script");
      hm.src = "https://hm.baidu.com/hm.js?809d5610fca8eaff344d9a698536110d";
      var s = document.getElementsByTagName("script")[0];
      s.parentNode.insertBefore(hm, s);
    })();
  </script>




  <noscript>
  <style type="text/css">
    .use-motion .motion-element,
    .use-motion .brand,
    .use-motion .menu-item,
    .sidebar-inner,
    .use-motion .post-block,
    .use-motion .pagination,
    .use-motion .comments,
    .use-motion .post-header,
    .use-motion .post-body,
    .use-motion .collection-title { opacity: initial; }

    .use-motion .logo,
    .use-motion .site-title,
    .use-motion .site-subtitle {
      opacity: initial;
      top: initial;
    }

    .use-motion {
      .logo-line-before i { left: initial; }
      .logo-line-after i { right: initial; }
    }
  </style>
</noscript>

</head>

<body itemscope itemtype="http://schema.org/WebPage" lang="zh-CN">

  
  
    
  

  <div class="container sidebar-position-left page-post-detail">
    <div class="headband">
    </div>
<a href="https://github.com/vector4wang"><img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png" alt="Fork me on GitHub"></a>
    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-wrapper">
  <div class="site-meta ">
    

    <div class="custom-logo-site-title">
      <a href="/" class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">冬与晨</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
    
  </div>

  <div class="site-nav-toggle">
    <button aria-label="切换导航栏">
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
    </button>
  </div>
</div>



<nav class="site-nav">
  
    <ul id="menu" class="menu">
      
        
        
        
          
          <li class="menu-item menu-item-home">
    <a href="/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-home"></i> <br>首页</a>
  </li>
        
        
        
          
          <li class="menu-item menu-item-about">
    <a href="/about/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-user"></i> <br>关于</a>
  </li>
        
        
        
          
          <li class="menu-item menu-item-tags">
    <a href="/tags/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-tags"></i> <br>标签</a>
  </li>
        
        
        
          
          <li class="menu-item menu-item-categories">
    <a href="/categories/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-th"></i> <br>分类</a>
  </li>
        
        
        
          
          <li class="menu-item menu-item-archives">
    <a href="/archives/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-archive"></i> <br>归档</a>
  </li>
        
        
        
          
          <li class="menu-item menu-item-commonweal">
    <a href="/404/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-heartbeat"></i> <br>公益 404</a>
  </li>

      
      
    </ul>
  

  
    

  

  
</nav>



  



</div>
    </header>

    


    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          
          <div id="content" class="content">
            

  <div id="posts" class="posts-expand">
    

  

  
  
  

  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://blog.wangxc.club/2017/05/08/Apache-TIKA-抽取多类型文件文本内容和文件的隐藏信息/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="wangxc">
      <meta itemprop="description" content="革命尚未成功，同志还需努力~">
      <meta itemprop="image" content="/images/avatar.jpg">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="冬与晨">
    </span>

    
      <header class="post-header">

        
        
          <h2 class="post-title" itemprop="name headline">Apache TIKA---抽取多类型文件文本内容和文件的隐藏信息
              
            
          </h2>
        

        <div class="post-meta">
          <span class="post-time">

            
            
            

            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              

              
                
              

              <time title="创建时间：2017-05-08 21:26:57" itemprop="dateCreated datePublished" datetime="2017-05-08T21:26:57+08:00">2017-05-08</time>
            

            
              

              
                
                <span class="post-meta-divider">|</span>
                

                <span class="post-meta-item-icon">
                  <i class="fa fa-calendar-check-o"></i>
                </span>
                
                  <span class="post-meta-item-text">更新于</span>
                
                <time title="修改时间：2018-06-29 21:58:50" itemprop="dateModified" datetime="2018-06-29T21:58:50+08:00">2018-06-29</time>
              
            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing"><a href="/categories/技术/" itemprop="url" rel="index"><span itemprop="name">技术</span></a></span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/2017/05/08/Apache-TIKA-抽取多类型文件文本内容和文件的隐藏信息/#comments" itemprop="discussionUrl">
                  <span class="post-meta-item-text">评论数：</span> <span class="post-comments-count valine-comment-count" data-xid="/2017/05/08/Apache-TIKA-抽取多类型文件文本内容和文件的隐藏信息/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        <p>使用Apache的开源项目Tika来抽取各种类型文件的文本内容及MetaData</p>
<a id="more"></a>

<iframe frameborder="no" border="0" marginwidth="0" marginheight="0" width="330" height="86" src="//music.163.com/outchain/player?type=2&id=337167&auto=1&height=66"></iframe>

<h3 id="前言"><a href="#前言" class="headerlink" title="前言"></a>前言</h3><p>有这样一个需求“用户上传一个文件，要得到这个文件的文本内容，和它的创建时间(用户创建的时间)”<br>乍一看上去，很简单啊，可以按字节读文件或按行读文件，也可以根据文件的类型引入对应的jar包去获取内容。文件的创建时间，我找了一些资料，可以通过下面代码实现：</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br></pre></td><td class="code"><pre><span class="line">Process p = Runtime.getRuntime().exec(<span class="string">"cmd /C dir "</span>           </span><br><span class="line">                    + filePath  </span><br><span class="line">                    + <span class="string">"/tc"</span> );  </span><br><span class="line">InputStream is = p.getInputStream();   </span><br><span class="line">BufferedReader br = <span class="keyword">new</span> BufferedReader(<span class="keyword">new</span> InputStreamReader(is));             </span><br><span class="line">String line;  </span><br><span class="line"><span class="keyword">while</span>((line = br.readLine()) != <span class="keyword">null</span>)&#123;  </span><br><span class="line">    <span class="keyword">if</span>(line.endsWith(<span class="string">".txt"</span>))&#123;  </span><br><span class="line">        strTime = line.substring(<span class="number">0</span>,<span class="number">17</span>);  </span><br><span class="line">        <span class="keyword">break</span>;  </span><br><span class="line">     &#125;                             </span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>

<p>到了这里，我正打算去实现，转念一想，apach的工具包里有提供文件复制的功能，即<code>IOUtils.copy(InputStream input, OutputStream output)</code>，那同样的Apach是否拥有提取文件内容和时间的工具类呢，还真有！这就是接下来要说的<strong>Apache  Tika</strong></p>
<h3 id="Tika"><a href="#Tika" class="headerlink" title="Tika"></a>Tika</h3><blockquote>
<p>The Apache Tika™ toolkit detects and extracts metadata and text from over a thousand different file types (such as PPT, XLS, and PDF). All of these file types can be parsed through a single interface, making Tika useful for search engine indexing, content analysis, translation, and much more. You can find the latest release on the download page. Please see the Getting Started page for more information on how to start using Tika.</p>
</blockquote>
<p><a href="http://tika.apache.org/" target="_blank" rel="noopener">官方地址</a></p>
<p>官方介绍说它可以支持上千种文件类型，同时支持搜索引擎索引、内容分析、翻译。。。看到这，感觉我又发现了新大陆！！！<br>下面这张图是tika的主要结构图：<br><img src="http://upload-images.jianshu.io/upload_images/3167229-0d3c151f5d34f9ff.jpg?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="tika_architecture.jpg"></p>
<p>tika会根据你输入的文件<strong>自动</strong>分配解析工具，如<code>PDFParser</code>、<code>OOXMLParser</code>和<code>HtmlParser</code>等等，然后解析语言、MIME type、文本内容和Metedata</p>
<p>我这里使用到Tika两个功能：</p>
<ul>
<li>文本内容抽取</li>
<li>元数据(MetaData)的获取<br>补充：MetaData即描述数据的数据！如一个文件的创建者，穿件时间，文档类型，字节数等等。<a href="https://en.wikipedia.org/w/index.php?title=Metadata&action=history" target="_blank" rel="noopener">维基百科</a></li>
</ul>
<p>这里是一个简单教程，能让你快速了解并使用其功能<a href="http://www.yiibai.com/tika/" target="_blank" rel="noopener">http://www.yiibai.com/tika/</a></p>
<h3 id="使用"><a href="#使用" class="headerlink" title="使用"></a>使用</h3><p>Tika是一个开源的项目，代码在这<a href="https://github.com/apache/tika，他提供了多个服务，有图形界面的也有Http服务" target="_blank" rel="noopener">https://github.com/apache/tika，他提供了多个服务，有图形界面的也有Http服务</a></p>
<h4 id="GUI"><a href="#GUI" class="headerlink" title="GUI"></a>GUI</h4><p>将项目checkout到本地并执行<code>mvn clean install</code>，然后进入tika-app目录，执行</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">java -jar tika-app-1.4.jar --gui</span><br></pre></td></tr></table></figure>

<p>如下动图</p>
<p><img src="http://upload-images.jianshu.io/upload_images/3167229-0f4b8c9ec0ecfac0.gif?imageMogr2/auto-orient/strip" alt="gui"></p>
<h4 id="Server"><a href="#Server" class="headerlink" title="Server"></a>Server</h4><p>关于官方指定使用<code>curl</code>，我就在ubuntu上启动了server，启动的方式为</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">java -jar tika-server/target/tika-server.jar</span><br></pre></td></tr></table></figure>

<p>测试了一个文件，如下图</p>
<p><img src="http://upload-images.jianshu.io/upload_images/3167229-5606fa60fadcf365.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="server.png"></p>
<p>还有其他的方法，如：</p>
<ul>
<li><p>Extract plain text:<br><code>curl -T price.xls http://localhost:9998/tika</code></p>
</li>
<li><p>Extract text with mime-type hint:<br><code>curl -v -H &quot;Content-type: application/vnd.openxmlformats-officedocument.wordprocessingml.document&quot; -T document.docx http://localhost:9998/tika</code></p>
</li>
<li><p>Get all document attachments as ZIP-file:<br><code>curl -v -T Doc1_ole.doc http://localhost:9998/unpacker &gt; /var/tmp/x.zip</code></p>
</li>
<li><p>Extract metadata to CSV format:<br><code>curl -T price.xls http://localhost:9998/meta</code></p>
</li>
<li><p>Detect media type from CSV format using file extension hint:<br><code>curl -X PUT -H &quot;Content-Disposition: attachment; filename=foo.csv&quot; --upload-file foo.csv http://localhost:9998/detect/stream</code></p>
</li>
</ul>
<p>可根据自己的需要去调用</p>
<h3 id="整合本地服务"><a href="#整合本地服务" class="headerlink" title="整合本地服务"></a>整合本地服务</h3><p>虽然tika自己提供了服务，但是有的时候，我们想在本地自己搭建一个服务，在抽取完内容后或许还要做些其他的工作。就这个目的，我将1.14版本的tika整合到了本地，并自己搭建了一个简单的服务，接口如下</p>
<p><img src="http://upload-images.jianshu.io/upload_images/3167229-a96d28392ed317a8.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="1.png"></p>
<p><img src="http://upload-images.jianshu.io/upload_images/3167229-7d08685f66e9ea63.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="2.png"></p>
<p>依然使用swagger整理接口，如果你对swagger不了解，可以<a href="http://www.jianshu.com/p/5bc11edca2d1" target="_blank" rel="noopener">点这里</a>,代码放在了github，欢迎<a href="https://github.com/vector4wang/springbootquick/tree/tika" target="_blank" rel="noopener">查看</a></p>

      
    </div>

    

    
    
    

    <div>
          
            
          
    </div>

    

    
      <div>
        <div style="padding: 10px 0; margin: 20px auto; width: 90%; text-align: center;">
  <div></div>
  <button id="rewardButton" disable="enable" onclick="var qr = document.getElementById('QR'); if (qr.style.display === 'none') {qr.style.display='block';} else {qr.style.display='none'}">
    <span>打赏</span>
  </button>
  <div id="QR" style="display: none;">

    
      <div id="wechat" style="display: inline-block">
        <img id="wechat_qr" src="https://i.loli.net/2017/12/29/5a4632028eaeb.jpg" alt="wangxc 微信支付">
        <p>微信支付</p>
      </div>
    

    
      <div id="alipay" style="display: inline-block">
        <img id="alipay_qr" src="https://i.loli.net/2017/12/29/5a46320234b1d.jpg" alt="wangxc 支付宝">
        <p>支付宝</p>
      </div>
    

    

  </div>
</div>

      </div>
    

    
<div>
  
    <div>
    
        <div style="text-align:center;color: #ccc;font-size:14px;">
              -------------本文结束<i class="fa fa-paw"></i>感谢您的阅读-------------
        </div>
    
</div>

  
</div>
    <footer class="post-footer">
      
        <div class="post-tags">
          
            <a href="/tags/apache/" rel="tag"><i class="fa fa-tag"></i> apache</a>
          
            <a href="/tags/tika/" rel="tag"><i class="fa fa-tag"></i> tika</a>
          
            <a href="/tags/web/" rel="tag"><i class="fa fa-tag"></i> web</a>
          
            <a href="/tags/抽取文本内容/" rel="tag"><i class="fa fa-tag"></i> 抽取文本内容</a>
          
        </div>
      

      
      
      

      
        <div class="post-nav">
          <div class="post-nav-next post-nav-item">
            
              <a href="/2017/05/04/如何优雅的“编写”api接口文档（续）/" rel="next" title="如何优雅的“编写”api接口文档（续）">
                <i class="fa fa-chevron-left"></i> 如何优雅的“编写”api接口文档（续）
              </a>
            
          </div>

          <span class="post-nav-divider"></span>

          <div class="post-nav-prev post-nav-item">
            
              <a href="/2017/05/23/调整下心态/" rel="prev" title="调整下心态">
                调整下心态 <i class="fa fa-chevron-right"></i>
              </a>
            
          </div>
        </div>
      

      
      
    </footer>
  </div>
  
  
  
  </article>



    <div class="post-spread">
      
    </div>
  </div>


          </div>
          

  
    <div class="comments" id="comments">
    </div>
  



        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    
    <div class="sidebar-inner">

      

      
        <ul class="sidebar-nav motion-element">
          <li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap">
            文章目录
          </li>
          <li class="sidebar-nav-overview" data-target="site-overview-wrap">
            站点概览
          </li>
        </ul>
      

      <section class="site-overview-wrap sidebar-panel">
        <div class="site-overview">
          <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
            
              <img class="site-author-image" itemprop="image" src="/images/avatar.jpg" alt="wangxc">
            
              <p class="site-author-name" itemprop="name">wangxc</p>
              <p class="site-description motion-element" itemprop="description">革命尚未成功，同志还需努力~</p>
          </div>

          
            <nav class="site-state motion-element">
              
                <div class="site-state-item site-state-posts">
                
                  <a href="/archives/">
                
                    <span class="site-state-item-count">83</span>
                    <span class="site-state-item-name">日志</span>
                  </a>
                </div>
              

              
                
                
                <div class="site-state-item site-state-categories">
                  <a href="/categories/index.html">
                    
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                    <span class="site-state-item-count">6</span>
                    <span class="site-state-item-name">分类</span>
                  </a>
                </div>
              

              
                
                
                <div class="site-state-item site-state-tags">
                  <a href="/tags/index.html">
                    
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                    <span class="site-state-item-count">124</span>
                    <span class="site-state-item-name">标签</span>
                  </a>
                </div>
              
            </nav>
          

          

          
            <div class="links-of-author motion-element">
              
                <span class="links-of-author-item">
                  <a href="https://github.com/vector4wang" target="_blank" title="GitHub" rel="external nofollow"><i class="fa fa-fw fa-github"></i>GitHub</a>
                  
                </span>
              
                <span class="links-of-author-item">
                  <a href="mailto:vector4wang@qq.com" target="_blank" title="E-Mail" rel="external nofollow"><i class="fa fa-fw fa-envelope"></i>E-Mail</a>
                  
                </span>
              
                <span class="links-of-author-item">
                  <a href="https://www.jianshu.com/u/223a1314e818" target="_blank" title="简书" rel="external nofollow"><i class="fa fa-fw fa-book"></i>简书</a>
                  
                </span>
              
                <span class="links-of-author-item">
                  <a href="http://blog.csdn.net/qqhjqs?viewmode=contents" target="_blank" title="CSDN" rel="external nofollow"><i class="fa fa-fw fa-crosshairs"></i>CSDN</a>
                  
                </span>
              
                <span class="links-of-author-item">
                  <a href="https://twitter.com/BMHJQS" target="_blank" title="Twitter" rel="external nofollow"><i class="fa fa-fw fa-twitter"></i>Twitter</a>
                  
                </span>
              
                <span class="links-of-author-item">
                  <a href="https://www.facebook.com/" target="_blank" title="FB Page" rel="external nofollow"><i class="fa fa-fw fa-facebook-official"></i>FB Page</a>
                  
                </span>
              
            </div>
          

          
          

          
          

          
            
          
          

        </div>
      </section>

      
      <!--noindex-->
        <section class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active">
          <div class="post-toc">

            
              
            

            
              <div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-3"><a class="nav-link" href="#前言"><span class="nav-number">1.</span> <span class="nav-text">前言</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#Tika"><span class="nav-number">2.</span> <span class="nav-text">Tika</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#使用"><span class="nav-number">3.</span> <span class="nav-text">使用</span></a><ol class="nav-child"><li class="nav-item nav-level-4"><a class="nav-link" href="#GUI"><span class="nav-number">3.1.</span> <span class="nav-text">GUI</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Server"><span class="nav-number">3.2.</span> <span class="nav-text">Server</span></a></li></ol></li><li class="nav-item nav-level-3"><a class="nav-link" href="#整合本地服务"><span class="nav-number">4.</span> <span class="nav-text">整合本地服务</span></a></li></ol></div>
            

          </div>
        </section>
      <!--/noindex-->
      

      
        <div class="back-to-top">
          <i class="fa fa-arrow-up"></i>
          
            <span id="scrollpercent"><span>0</span>%</span>
          
        </div>
      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <script async src="https://dn-lbstatics.qbox.me/busuanzi/2.3/busuanzi.pure.mini.js"></script>
<div class="copyright">&copy; <span itemprop="copyrightYear">2020</span>
  <span class="with-love" id="animate">
    <i class="fa fa-user"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">wangxc</span>

  

  
</div>


  


<div class="powered-by">
<i class="fa fa-user-md"></i><span id="busuanzi_container_site_pv">
  本站访客数:<span id="busuanzi_value_site_uv"></span>
</span>
</div>


  <div class="powered-by">由 <a class="theme-link" target="_blank" rel="external nofollow" href="https://hexo.io">Hexo</a> 强力驱动 v3.8.0</div>



  <span class="post-meta-divider">|</span>



  <div class="theme-info">主题 &mdash; <a class="theme-link" target="_blank" rel="external nofollow" href="https://theme-next.org">NexT.Mist</a> v6.3.0</div>




        








        
      </div>
    </footer>

    

    

  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>


























  
  
    <script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script>
  

  
  
    <script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>
  


  


  <script type="text/javascript" src="/js/src/utils.js?v=6.3.0"></script>

  <script type="text/javascript" src="/js/src/motion.js?v=6.3.0"></script>



  
  

  
  <script type="text/javascript" src="/js/src/scrollspy.js?v=6.3.0"></script>
<script type="text/javascript" src="/js/src/post-details.js?v=6.3.0"></script>



  


  <script type="text/javascript" src="/js/src/bootstrap.js?v=6.3.0"></script>



  



	





  





  








  <script src="//cdn1.lncld.net/static/js/3.0.4/av-min.js"></script>
  
  
  <script src="//unpkg.com/valine/dist/Valine.min.js"></script>
  
  <script type="text/javascript">
    var GUEST = ['nick','mail','link'];
    var guest = 'nick,mail,link';
    guest = guest.split(',').filter(function (item) {
      return GUEST.indexOf(item)>-1;
    });
    new Valine({
        el: '#comments' ,
        verify: false,
        notify: false,
        appId: 'YdGFHBfuDwObik8qwCeRAWgr-gzGzoHsz',
        appKey: 'oFpbJwmMQf98DfFDUlzE5Fai',
        placeholder: 'Just go go',
        avatar:'mm',
        guest_info:guest,
        pageSize:'10' || 10,
    });
  </script>



  





  

  

  

  

  
  

  

  

  

  

  

</body>
</html>
